{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import binascii\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "import scipy.io as sio\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "PATH = os.getenv('AGI_DATA_HOME') + '/svhn/' # Path to dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Load the train/test data\n",
    "train_data = sio.loadmat(PATH + 'src/train_32x32.mat')\n",
    "test_data = sio.loadmat(PATH + 'src/test_32x32.mat')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Split features/labels\n",
    "X_train = train_data['X']\n",
    "y_train = train_data['y']\n",
    "\n",
    "X_test = test_data['X']\n",
    "y_test = test_data['y']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X_train: (32, 32, 3, 73257)\n",
      "y_train: (73257, 1)\n",
      "X_test: (32, 32, 3, 26032)\n",
      "y_test: (26032, 1)\n"
     ]
    }
   ],
   "source": [
    "# Data dimensions\n",
    "print('X_train: {0}'.format(X_train.shape))\n",
    "print('y_train: {0}'.format(y_train.shape))\n",
    "print('X_test: {0}'.format(X_test.shape))\n",
    "print('y_test: {0}'.format(y_test.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[    1 13861]\n",
      " [    2 10585]\n",
      " [    3  8497]\n",
      " [    4  7458]\n",
      " [    5  6882]\n",
      " [    6  5727]\n",
      " [    7  5595]\n",
      " [    8  5045]\n",
      " [    9  4659]\n",
      " [   10  4948]]\n"
     ]
    }
   ],
   "source": [
    "y_train_count = np.bincount(y_train.ravel())\n",
    "y_train_labels = np.nonzero(y_train_count)[0]\n",
    "y_train_freq = np.vstack((y_train_labels, y_train_count[y_train_labels])).T\n",
    "print(y_train_freq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[   1 5099]\n",
      " [   2 4149]\n",
      " [   3 2882]\n",
      " [   4 2523]\n",
      " [   5 2384]\n",
      " [   6 1977]\n",
      " [   7 2019]\n",
      " [   8 1660]\n",
      " [   9 1595]\n",
      " [  10 1744]]\n"
     ]
    }
   ],
   "source": [
    "y_test_count = np.bincount(y_test.ravel())\n",
    "y_test_labels = np.nonzero(y_test_count)[0]\n",
    "y_test_freq = np.vstack((y_test_labels, y_test_count[y_test_labels])).T\n",
    "print(y_test_freq)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Label: [1]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<module 'matplotlib.pyplot' from '/Users/Abdel/Developer/anaconda/envs/py27/lib/python2.7/site-packages/matplotlib/pyplot.pyc'>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHN5JREFUeJztnV2MXVd1x//rfNx7ZzwTf+DEcZ2UFCkvCLWhsiKkoooW\nUaWoEvASwQPKA6r70KIitQ8RlQp9o1Wh4gnJlKhpRflQAYEq1AqiVlGlimJoSELTFkoTsON4HMex\nZ8Yz9+Oc1Yd7U9nO/q+59syccbL/P8nynbPvPnudfc665979P2stc3cIIfKj2GsDhBB7g5xfiEyR\n8wuRKXJ+ITJFzi9Epsj5hcgUOb8QmSLnFyJT5PxCZEq1nc5m9gCATwMoAfylu38ien9Zll5VdbKt\naSe8I3kIsa7T+wKAugo+11rjbQV/4tGKdD+LdoegMXi4srWbe/KSjdcGgwUWwoJ+0cOhk0l6r+Om\nuSk76pq3liU/1+akn7XBaHx/bXDQbXtzc1WQ66oILmHWtLYxxHA4jqby/7GbfbzXzEoA/w3gXQBO\nA/gugA+4+3+wPv3+wI/d+cZk20vDC3ywUXrzsWNHaZejhxdpm10J5qbH56O3XCa3VxW/kBa9T9sm\nwYfQ2MhBAwD4eD2kxxsZd7oy2F/lY9o2cX51rpxPH9vZi6u0zyD4wPu5Y/w+tbS8TNvKhp2zddqn\nbfbRts0xn4/VK3weveXzP+inb2KLS/z66JNPhn/85ydw4eW1uZx/O1/77wfwY3f/ibuPAHwRwHu2\nsT8hRIdsx/mPAfjZVX+fnm0TQrwG2NZv/nkwsxMATgBAWe76cEKIOdnOnf8MgLuv+vuu2bZrcPeT\n7n7c3Y+XZfr3lxCie7bj/N8FcK+Z/YKZ9QC8H8A3dsYsIcRuc9Pfw919Yma/B+AfMZX6HnH3H4Z9\nDBgvpFc9fRx8KyjTK6w25IuaRctlwCv1kI8VSEADTysIdbXJ99dwG9uKrwDXTfC5bLytnNz457kX\nG7RtxBe3sfISt//559Pb16L5uG2ND2Zv4G0NN7Ilq+LDCVcINtb4/k6f4dfO5SGfDwRKxuJiuu3Y\nG/i5PHB4kNzehoLptWzrR7i7fxPAN7ezDyHE3qAn/ITIFDm/EJki5xciU+T8QmSKnF+ITOn0kbvS\ngKUiLeldmXBTWiIPrY4v0T5jLNA2D6KvqiqIfqvSwTbW8rHMgyg245KSW4+2lS2XRUdEqmyG/Lie\nP8/nfm2Vy4CrIx58NG7TsuigTUtUALAYBB9VwZU6CVTinqcl3/UrXLL7r+d48NE4mMdeIGNuLvK5\nuniFjFXyse7dR6I32yha8Vp05xciU+T8QmSKnF+ITJHzC5Epcn4hMqXT1X6zAoN+Os2UFTyoo08C\nWYbDYLV5wlc9F6LV8oZPiVm6rXa+PyuD1X6yEg0AFuWRCwJxWppai68cr23y1e2h81XxuuHBMSR7\nFpqC52oc9II8fUFOxipIotiM00vpp3/KlZaNQHnq93kQ19IBfs0d63GVY52s9q+s8/09dzl9XkaB\nknU9uvMLkSlyfiEyRc4vRKbI+YXIFDm/EJki5xciUzqW+oCySmtAUeUgJ3IZKwkFAMOWSzlVwQNx\niiAHXt2mp6vp87EK4zJgPyjKw6pMAcAYPOinYpV+Bvy4fv6uJW7HiFcc+ukKbcIGka8WgupA/ZJX\nWaqM21gG8uHZs+m2ly/z46r2cemzd5Cf6zuOHKJt+ws+3miclm7Xn+WSrp8jczVfpS4AuvMLkS1y\nfiEyRc4vRKbI+YXIFDm/EJki5xciU7Yl9ZnZswBWATQAJu5+PHp/gRJLRLIpJhdov1EvLQ9VQdWt\ndpNLHpNFHmlXBLKRMYkwiCqLPl+9DEpXBf0q8AOfkBx+VnApdYHkgwOAuuayaFlyO0oS1lcUwdwH\ncmQZ2D8ksiIAnH8x3c8H3PZewefjjiUuR962GERillzX7TdpGfDgIpeJXzxLjmv+FH47ovP/mru/\nuAP7EUJ0iL72C5Ep23V+B/BtM/uemZ3YCYOEEN2w3a/9b3f3M2Z2B4Bvmdl/uvvjV79h9qFwAgD6\nPf6IoxCiW7Z153f3M7P/VwB8DcD9ifecdPfj7n68rnjaKiFEt9y085vZPjNbfuU1gN8A8PROGSaE\n2F2287X/CICv2VTmqgD8rbv/Q9TBiha9fWmJpSWlsACg9LSZ45ZHPTWbXPMoFoLPvKCE1qhMt/WC\n3RVBCaemCGRAItkBwBjcxj5JTlqROQR4iS8AaAreb9zwc1YRxdT6fD4Wgl+FXnGJ8OIFLttdYdqX\n8T77B1xiO7y8j7ZZ4E5FEKZZW1q26+/n+xueTScSvQGl7+ad391/AuCXbra/EGJvkdQnRKbI+YXI\nFDm/EJki5xciU+T8QmRKpwk8YYZJkZZRorp1PVJnriG18wBgzV+gbYfsbto2BpdkKiKxlUF0Xk3q\nDAJAOeT120YFl6K84Ak8afSYcamsch7JyEVFoL/B7R+SfY4DXbSteZsFkunqBS7NOZHY2kDCrJa5\nYLZY8mOeBDUg65LX+CuKdFjiPgsSzZZpf7Hg+n3VPuZ+pxDidYWcX4hMkfMLkSlyfiEyRc4vRKZ0\nutpfoMASyYMXfQpNmvRKdbCQjuEGX/UcO1/5boIAmALpld6K5KsDAAuCZib9oBzThOesWwxWlRtP\nz29T8CAcd56Xrm25/aOWr2CDlDZbLLntFYsGAmCbPKDmypD3K4lKEOXpW675WFGAkZX8nDXBeCDz\n72QOAaBo+Fjzoju/EJki5xciU+T8QmSKnF+ITJHzC5Epcn4hMqVTqc8MqOu05FFXQTmmCSlBFUh2\nw81IDgtKRlVB4Iml91mCB9rA1vn+nI/VkBJlAGAjnuyuIYEs45LLYb1AM23A52ri/JyVZbpfO+B2\nDEiwCgBsjHiI0XAcSLfk0Pp93qcflHOL5OWe82uuDZS+ikh6bRRwRQxxBfYIIbZCzi9Epsj5hcgU\nOb8QmSLnFyJT5PxCZMqWUp+ZPQLgtwCsuPtbZtsOAfgSgHsAPAvgQXe/OMe+UFbpIcuCyzyjNh2R\nVgZRVMY0HsTS0FKfyzVFkR7PCi5DteHna5BHruXHNqmCUmRNeh6jE10UQWmwMZ+PySSQRUn024Ga\nz32vWqZtmw2XvaoRz3XHovAGNT/mhZrPfdEGMiufKsDWaJOXaal4DJ7HcdxL2xiVebueee78fwXg\ngeu2PQzgMXe/F8Bjs7+FEK8htnR+d38cwEvXbX4PgEdnrx8F8N4dtksIscvc7G/+I+5+dvb6BUwr\n9gohXkNse8HP3R0gifUBmNkJMztlZqeGI55NRgjRLTfr/OfM7CgAzP5fYW9095Puftzdj/d7wTPw\nQohOuVnn/waAh2avHwLw9Z0xRwjRFfNIfV8A8A4Ah83sNICPAfgEgC+b2YcAPAfgwfmGa+GW/upf\n9YJopI30r4oySHDYNPwnxmTIxyrqJdpWkyirNkjOaEGkVxFExaHhx+ZBhF4BUibLA4mq4PvzDX5/\nCNJ3oiZzskxkrSncxiKQPusgySiGaftLkugUAIoySLoaSIQWRe5NeJmvhhz3eJ1fOwttWjq8kbv5\nls7v7h8gTe+8gXGEELcYesJPiEyR8wuRKXJ+ITJFzi9Epsj5hciUThN4AgUqluyy4rJG6Wl5pW55\njbkReOLM4YRHWJXtftrWELmsskCyC5J0lkEEVksiCAFgXPBor8KIjRMeNTnhD2hiPagZWJb82IzI\nZW3Nz3PQBKu4DLjZW+X9QHba51Jw6fwa8KCWYx0lhnUu9TE5uF3j10c1vC29r6CO46tsmvudQojX\nFXJ+ITJFzi9Epsj5hcgUOb8QmSLnFyJTupX6zAESFdVf4CFRlZEEh7ZB+3iQlHJtxKPYNusgYo7I\nb3XLI9U2Si4pWVTrjtTcAwALpLkaRGJDkEglGGu8yceqAvvHTbqtiKI3g+jCgXGp0oNaiWMS5Ti0\noAZhcYW2LVbcjgKBjS2/z46vpOfq8pDbOOqRWoiBRHw9uvMLkSlyfiEyRc4vRKbI+YXIFDm/EJnS\n6Wq/wzAieff2lTxwY6W9lNzeBoElpfG2YhyU1zKema5EOuDDEaxgB22F89XhID0hymB1u2X57Izv\nsB+U3SqHPIiobbn9vYW0umD9YO6dl+uqF7gS0Fviq+LNetrGcpOfl3YSKC1FcGKCFf0iCOK6Mkrb\nv0YUk+kO5y/LRXex7T0IIV6TyPmFyBQ5vxCZIucXIlPk/EJkipxfiEyZp1zXIwB+C8CKu79ltu3j\nAH4bwPnZ2z7q7t/cejiDEcmp6QXlpFgatkDtmJRcrlkP8upVgXzF6jE1VRT8wncXSYQeBJ54IBH2\nyHhtySW7zSBA52IwV5GNdZ2W9AaBVOZlWtIFgLrPZcA7Sp7L8acjIi1y07E24bJiFMS1GOT3K4Ig\ntMtraVmUyeIAYFQ63NnAnr8C8EBi+1+4+32zf3M4vhDiVmJL53f3xwG81IEtQogO2c5v/g+b2ZNm\n9oiZHdwxi4QQnXCzzv8ZAG8CcB+AswA+yd5oZifM7JSZnRoOg4QSQohOuSnnd/dz7t64ewvgswDu\nD9570t2Pu/vxfj+qzS6E6JKbcn4zO3rVn+8D8PTOmCOE6Ip5pL4vAHgHgMNmdhrAxwC8w8zuw1RX\neBbA78w1mjm8SGssg36Uwy/d1gafXRaU0LIgWmpCcuABQEvs6AUlkvwmAyet4ccWVNCCE2nOCx5N\nZ02gma4H8lUgVVaWnpOBLfA+gYRpQS7EA8f4/D+/lp6sl8dc6xvw6l9YWODzuK/Hv9kOAxlw/WLa\nlsUJ79OSqFUL8jFez5ZXprt/ILH5c3OPIIS4JdETfkJkipxfiEyR8wuRKXJ+ITJFzi9EpnSawNPc\nUJMkh8s1l4CKMi3lbDqPlDIiKQJAOeKSzGjEJaWWlJNqgoSaRZDU0YJIuyKQ3yZB8sYekXqKIKHp\nMJCUipbPYy8qr9VLy3aDkp9nFvE5heub+w9ybe72O9PS3LkX+Fgvn+djLQYy8XAfP5+rq/ycXSLl\nujDgc195en9EjU6iO78QmSLnFyJT5PxCZIqcX4hMkfMLkSlyfiEypVOpD3A4iZob9Hhk1kKVbhuP\ngmi6KNKu4RLK+iaXa0oibYWV+rgZcOcS4STQbOqCS1sNSTDpCKL6rvDLoAmSWY6rwEZyPq0fZM4M\n5gqsBiGA0ng04J13pPs1QT2+1VUu9555gbc1QVbQYsIPrq0Hye1VcGVNFtNjebGzCTyFEK9D5PxC\nZIqcX4hMkfMLkSlyfiEypdPVfgfQkNX+ts9X2XskN5qtr/PBWr5SGsRmYC1IL96Q1fme8VV7c77K\njiBAp2Y1ygBMgnxwIAFNVcPLoV0OkgK2wRJ8HSgI+0hOxn5QrqsJVrdHJbexFwRW7V8gbUev0D4X\nb6NN2NzkwWTDIVcdxpeC4x6ly421PX4tLi2n7SjLIB/jdejOL0SmyPmFyBQ5vxCZIucXIlPk/EJk\nipxfiEyZp1zX3QD+GsARTNW6k+7+aTM7BOBLAO7BtGTXg+5+MdwXHDXSgSJ1u0z7LfTTkl5jL9I+\nLO8fAFRBXr3RBpevWCmssgrKf1kQ7NFyaagN8vs1ZbDPJi2XefA5/1LDJdPVPpeO+kFgT9VPy1Rt\nuY/28TYteQFAbZu0rS24HWUvPVfLgaxY9rgs6g0/Zy9f5G0/u8Tnv6zJOat4UNUSCZwqbiCJ3zx3\n/gmAP3D3NwN4G4DfNbM3A3gYwGPufi+Ax2Z/CyFeI2zp/O5+1t2/P3u9CuAZAMcAvAfAo7O3PQrg\nvbtlpBBi57mh3/xmdg+AtwL4DoAj7n521vQCpj8LhBCvEeZ2fjNbAvAVAB9x98tXt7m7gyRWN7MT\nZnbKzE4Nh8GjrkKITpnL+c2sxtTxP+/uX51tPmdmR2ftRwGspPq6+0l3P+7ux/t9viAihOiWLZ3f\nzAzA5wA84+6fuqrpGwAemr1+CMDXd948IcRuMU9U368A+CCAp8zsidm2jwL4BIAvm9mHADwH4MGt\nd2XBkIGktESkkBd46Scf8/0VQTRaOw6i6UjpqspuLnJvbFxia4OoviLI/VdYWqYqLIj22uBjldEv\ntSASc1CnQ+OKICKxMh7FNikCWdS5JAZSAqwuuay4POFS6qThNv7vs9zGUWAiSVGJ5Yqfs9sOpn0i\nULhfPe5Wb3D3fwHPUfnO+YcSQtxK6Ak/ITJFzi9Epsj5hcgUOb8QmSLnFyJTOi/X1RJZzIPEjhVJ\nwlgGUU9tlDgziOgqPCirRD4rmyIoDRaUcKo9iB4LTo03vK0lMqYHcmS7zu3vB8lOi5pLfRU7tGCu\nJkEiVJaYdNoURMwRqc9LLsv1A7ns5dOXaduVS4G8HER+tuQ63n+Y97l9/8Hk9uoGtD7d+YXIFDm/\nEJki5xciU+T8QmSKnF+ITJHzC5EpHUt9hoLUjGuCSLv9dfozqlfyz64rQy55WMkP2ybcjqJIS1te\ncumwbga0bTNIxNkPkowWBZc4N9M5VTAMospGm3wsr3kU2yBIQMpye7aBEuUezGMQyQjj9rOmMogi\nvXiZ1/H7yXNcfrsSydVBQtYDB9LjHb5jifYp63QiVDNJfUKILZDzC5Epcn4hMkXOL0SmyPmFyJRO\nV/sdwJgEaLTBanSvSq+Yl70gG/CIr1J7UNIoWPhG2aaVCiJGAAAm4Qp2ELwTKAGb4MfWsM/zDb66\nPWx5KaxiwlfZfYEfW1Ok8ys2wTHXpBwaAFiQ368HnssRbfq418f8gvvfH3HF53KQ73C0yG1cXlqj\nbXcdSV9X+5b20z7O5JT5q3Xpzi9Ersj5hcgUOb8QmSLnFyJT5PxCZIqcX4hM2VLqM7O7Afw1piW4\nHcBJd/+0mX0cwG8DOD9760fd/ZtbjkiknsK43FSSfHBLA54Db2ONB2cYCX4BgH5QXosFpTQtL/2E\nIGCJ5ZcDgEkRJM9zLnG6byS3X3wxvR0ANoKcgE0/sJ/VmQJgxPxeEWlRQZmsoNxYCX7tTEjTUz/i\nfVY2eFtNZGcAuKO3StuO3kmbcGj/cnJ7rxfJxOm2G1D65tL5JwD+wN2/b2bLAL5nZt+atf2Fu//5\nDYwnhLhFmKdW31kAZ2evV83sGQDHdtswIcTuckO/+c3sHgBvBfCd2aYPm9mTZvaImaVzCQshbknm\ndn4zWwLwFQAfcffLAD4D4E0A7sP0m8EnSb8TZnbKzE4Nh/zxRyFEt8zl/GZWY+r4n3f3rwKAu59z\n98bdWwCfBXB/qq+7n3T34+5+vN8PsrEIITplS+c3MwPwOQDPuPunrtp+9Kq3vQ/A0ztvnhBit5hn\ntf9XAHwQwFNm9sRs20cBfMDM7sNU/nsWwO/MNaKnNaA2yj1GZKPblg/RLmfPX6JtVcslpSaQV3ok\n6qyK5EHnbdYGcl7J+116jktRz7+Y/mm1cpHLg23N7TCm2QGYrPO5unghbWPLFViUS1xWHDC9F8AF\nfqrx0zPp3HmXXubXQLnAj3np9gu07cjBdF49ADh4iMvB9SAdlVgG7smvjvmZZ7X/X5CWD7fW9IUQ\ntyx6wk+ITJHzC5Epcn4hMkXOL0SmyPmFyJRuy3U5ACJvRdFIRloP33Yb7XO65tFXwxEvnVQiiJiz\ndJsHfdrgwHoVl7baIJnlxpCPd/nSenJ700tvB4DFQM5rGm7HOChtdnolvc8zA27HUiBHji5yeXMt\nSMZpvbSkd+gwP67+MhfSDi9zyW7hYDo6DwAO9HmbkXJvFlw8RqTgIDftq9CdX4hMkfMLkSlyfiEy\nRc4vRKbI+YXIFDm/EJnSqdRXAOiTEL1xEMW2WaaluWIxiKI6yOucnVk5S9ssqK3npG0cSHbeBpFZ\ngZxHShoCAA7cyZNxWj/d0Sd8ficNj6iMEmdWm7wfm5P+Pi7B7gtuRedrHg64HCQ03V+kJTbfz+XB\npUW+v6U+T1jV6/N+RcVPaEFOdhFEuvIktEGk6PX7n/udQojXFXJ+ITJFzi9Epsj5hcgUOb8QmSLn\nFyJTOpX63AyTktXq4/3qJi03lSWX2O44ukTbLrzEJRSfBIaUablpUKQTMAJA2wYRZy2XhliiUwDo\n9fhn9qFD6eNumqBmgvP5qIL5mARRZxMi9fUqPldVw/dXkSSXANAENf4GZTpdfNXjaeQjya4s+FwV\ndSATBxKcIx0p2JT8nDlJ4Rko1a9Cd34hMkXOL0SmyPmFyBQ5vxCZIucXIlO2XO03swGAxwH0Z+//\nO3f/mJkdAvAlAPdgWq7rQXe/GO+sQVunc7gVQx7wgSZdqqmteYDLG5b5quzR23lwxrmzfIWV5fcr\nWl5KalLylWgPoneKoIRWvwpWo/vpVfGm4ad6HKwq90a8nzV8xXyjdzm5vQhknSJY7V+y4D4VrHBX\nRdrGuuTXxygov8aCuwCgDlb0g7geTMh1MLHAPUkwk93A/Xyedw4B/Lq7/xKm5bgfMLO3AXgYwGPu\nfi+Ax2Z/CyFeI2zp/D5lbfZnPfvnAN4D4NHZ9kcBvHdXLBRC7ApzfUcws3JWoXcFwLfc/TsAjrj7\nK4HxLwA4sks2CiF2gbmc390bd78PwF0A7jezt1zX7iBZBMzshJmdMrNTwyF/Ik8I0S03tNrv7i8D\n+CcADwA4Z2ZHAWD2/wrpc9Ldj7v78X7w2KQQolu2dH4zu93MDsxeLwB4F4D/BPANAA/N3vYQgK/v\nlpFCiJ1nnsCeowAeNbMS0w+LL7v735vZvwL4spl9CMBzAB7celcGK9JDBsoLHOmfC0HaP0zIOADw\nc8fuom2XNpJfYKa0aSNLEmQBAGVQP2kcBKRUQQmwIji2tkwHEpU1l+WisapAqpwY/xm3aGnJsbVA\n+gwuRzceIFUGOfycBeIE0lsvGKtBIBEGUl9bcSm7JN3KYH+jIp3XkgX8pNjS+d39SQBvTWy/AOCd\nc48khLil0BN+QmSKnF+ITJHzC5Epcn4hMkXOL0SmmAe54nZ8MLPzmMqCAHAYwIudDc6RHdciO67l\ntWbHG9399nl22KnzXzOw2Sl3P74ng8sO2SE79LVfiFyR8wuRKXvp/Cf3cOyrkR3XIjuu5XVrx579\n5hdC7C362i9EpuyJ85vZA2b2X2b2YzPbs9x/ZvasmT1lZk+Y2akOx33EzFbM7Omrth0ys2+Z2Y9m\n//Mso7trx8fN7MxsTp4ws3d3YMfdZvZPZvYfZvZDM/v92fZO5ySwo9M5MbOBmf2bmf1gZsefzLbv\n7Hy4e6f/AJQA/gfAmwD0APwAwJu7tmNmy7MADu/BuL8K4JcBPH3Vtj8D8PDs9cMA/nSP7Pg4gD/s\neD6OAvjl2etlAP8N4M1dz0lgR6dzgmk+4qXZ6xrAdwC8bafnYy/u/PcD+LG7/8TdRwC+iGky0Gxw\n98cBvHTd5s4TohI7Osfdz7r792evVwE8A+AYOp6TwI5O8Sm7njR3L5z/GICfXfX3aezBBM9wAN82\ns++Z2Yk9suEVbqWEqB82sydnPwt2/efH1ZjZPZjmj9jTJLHX2QF0PCddJM3NfcHv7T5NTPqbAH7X\nzH51rw0C4oSoHfAZTH+S3QfgLIBPdjWwmS0B+AqAj7j7NVU/upyThB2dz4lvI2nuvOyF858BcPdV\nf98129Y57n5m9v8KgK9h+pNkr5grIepu4+7nZhdeC+Cz6GhOzKzG1OE+7+5fnW3ufE5SduzVnMzG\nvuGkufOyF87/XQD3mtkvmFkPwPsxTQbaKWa2z8yWX3kN4DcAPB332lVuiYSor1xcM96HDubEzAzA\n5wA84+6fuqqp0zlhdnQ9J50lze1qBfO61cx3Y7qS+j8A/miPbHgTpkrDDwD8sEs7AHwB06+PY0zX\nPD4E4A2Ylj37EYBvAzi0R3b8DYCnADw5u9iOdmDH2zH9CvskgCdm/97d9ZwEdnQ6JwB+EcC/z8Z7\nGsAfz7bv6HzoCT8hMiX3BT8hskXOL0SmyPmFyBQ5vxCZIucXIlPk/EJkipxfiEyR8wuRKf8Hv1Gt\nFZnf3nEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1185ce790>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Show sample image\n",
    "image_index = 0\n",
    "\n",
    "plt.imshow(X_train[:, :, :, image_index])\n",
    "print('Label: {0}'.format(y_train[image_index]))\n",
    "plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Path to training/test data\n",
    "train_path = PATH + 'training/'\n",
    "test_path = PATH + 'testing/'\n",
    "\n",
    "train_count = int(X_train.shape[3])\n",
    "test_count = int(X_test.shape[3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def preprocess(X, y, num_data, target_path, data_type, num_labels=10, num_random=6, display_step=1000):\n",
    "    # Initialise label count\n",
    "    label_count = {}\n",
    "    for i in range(num_labels):\n",
    "        label_count[i] = 0\n",
    "    \n",
    "    for i in range(num_data):\n",
    "        label = y[i][0]\n",
    "        random = binascii.hexlify(os.urandom(num_random // 2)).decode()\n",
    "\n",
    "        count = 0\n",
    "        if label in label_count:\n",
    "            count = label_count[label]\n",
    "            count += 1\n",
    "        label_count[label] = count\n",
    "\n",
    "        filename = '%s_%s_%i_%i.png' % (data_type, random, label, label_count[label])\n",
    "        image = Image.fromarray(X_train[:, :, :, i])\n",
    "#         image = image.convert('LA') # grayscale\n",
    "        image.save(target_path + filename)\n",
    "\n",
    "        if i % display_step == 0 or i == 1:\n",
    "            print('Step #%i: saved %s' % (i, filename))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step #0: saved train_9212a6_1_1.png\n"
     ]
    }
   ],
   "source": [
    "preprocess(X_train, y_train, 1, train_path, 'train')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step #0: saved test_a334e6_5_1.png\n"
     ]
    }
   ],
   "source": [
    "preprocess(X_test, y_test, 1, test_path, 'test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
